# -*- coding: utf-8 -*-
import scrapy

from itcast.items import ItcastItem


class GushiwenSpider(scrapy.Spider):
    name = 'gushiwen'
    allowed_domains = ['gushiwen.cn']
    start_urls = ['https://so.gushiwen.cn/authors/']

    def start_requests(self):
        return super().start_requests()

    def parse(self, response):
        for author in response.xpath('//*[@id="leftZhankai"]/div'):
            yield ItcastItem({
                'name': author.xpath('div[1]/p[1]/a[1]/b/text()').get(),
                'intro': author.xpath('div[1]/p[2]/text()').get()
            })

        next_page = response.xpath(
            '//*[@id="FromPage"]/div/a[1]'
        ).attrib.get('href')
        if next_page is not None:
            yield response.follow(next_page, self.parse)
